####### one-sample tests (prefs1AB)
prefs1AB = read.csv("chapter5/prefs1AB.csv")
t.test(prefs1AB$email_preference == "A-mail", mu=0.5)

# chi-squared test
email_preferences = xtabs( ~ email_preference, data=prefs1AB)
chisq.test(email_preferences)

#binomial test
binom.test(email_preferences)



####### one sample, >3 levels of response tests (prefs1ABC)

#multinomial test
library(XNomial)
prefs1ABC = read.csv("chapter5/prefs1ABC.csv")
email_preferences = xtabs( ~ email_preference, data=prefs1ABC)
xmulti(email_preferences, c(1/3, 1/3, 1/3), stat="Prob")

#post hoc binomial
a_test = binom.test(sum(prefs1ABC$email_preference == "A-mail"), nrow(prefs1ABC), p=1/3)
b_test = binom.test(sum(prefs1ABC$email_preference == "B-mail"), nrow(prefs1ABC), p=1/3)
c_test = binom.test(sum(prefs1ABC$email_preference == "C-mail"), nrow(prefs1ABC), p=1/3)
p.adjust(c(a_test$p.value, b_test$p.value, c_test$p.value))

#chisquared test 
email_preferences = xtabs( ~ email_preference, data=prefs1ABC)
chisq.test(email_preferences)



######## N-sample chi square (prefs2ABC)

#chi-square
prefs2ABC = read.csv("chapter5/prefs2ABC.csv")
#we specify multiple factors in the xtabs formula to get 
#crosstabs of higher dimensions
email_preferences = xtabs( ~ email_preference + team, data=prefs2ABC)
chisq.test(email_preferences)



###### single factor tests (salesXY)

## anova assumptions

# shapiro-wilk
salesXY = read.csv("chapter5/salesXY.csv")
shapiro.test(salesXY[salesXY$team == "X",]$sales)
shapiro.test(salesXY[salesXY$team == "Y",]$sales)

#anderson-darling test
library(nortest)
ad.test(salesXY[salesXY$team == "X",]$sales)
ad.test(salesXY[salesXY$team == "Y",]$sales)

#ks test
lillie.test(salesXY[salesXY$team == "X",]$sales)
lillie.test(salesXY[salesXY$team == "Y",]$sales)

#levene's test
library(car)
leveneTest(sales ~ team, data=salesXY)

## single factor tests 

#t test
t.test(sales ~ team, data=salesXY)

#median test
library(coin)

median_test(sales ~ team, data=salesXY, dist="exact")
#this appears to give the same results as:
# fisher.test(xtabs(cbind(sales > median(sales), sales <= median(sales)) ~ team, data=salesXY))

#wilcox
library(coin)
wilcox_test(sales ~ team, data=salesXY, dist="exact")




####### anova, one-way 3 levels
salesXYZ = read.csv("chapter5/salesXYZ.csv")

summary(aov(sales ~ team, data=salesXYZ))


########kruskal wallis
library(coin)
kruskal.test(sales ~ team, data=salesXYZ)




#########single factor within-subjects

salesYY = read.csv("chapter5/salesYY.csv")

#paired t test
library(reshape2)	#for dcast
#for paired.t.test we must use a wide-format table. Most functions
#in R do not require a wide-format table, but the dcast function
#offers a quick way to translate long-format into wide-format when
#we do need it.
salesYY_wide = dcast(salesYY, subject ~ watch, value.var="sales")
t.test(salesYY_wide$pre, salesYY_wide$post, paired=TRUE)

#sign test (manually)
post_sales_greater = xtabs( ~ post > pre, data=salesYY_wide)
binom.test(post_sales_greater)

#paired wilcox
wilcoxsign_test(sales ~ watch | subject, data=salesYY, dist="exact")



######### single factor within-subjects, three levels

####one-way repeated measures ANOVA
salesYY2 = read.csv("chapter5/salesYY2.csv")

library(ez)
m = ezANOVA(dv=sales, within=watch, wid=subject, data=salesYY2)
m$Mauchly
m$ANOVA

###friedman
library(coin)
friedman_test(sales ~ watch | subject, data=salesYY2)

library(plyr)
# get all pairwise combinations of levels of the watch factor
# this is equivalent to combn(levels(salesYY2$watch), 2, simplify=FALSE)
comparisons = list(c("none", "one"), c("none", "two"), c("one", "two"))
# run wilcox signed rank tests on each pair of levels, collecting 
# the test statistic and the p value into a single table
post_hoc_tests = ldply(comparisons, function(watch_levels) {
    wt = wilcoxsign_test(sales ~ factor(watch) | subject, 
        data=salesYY2[salesYY2$watch %in% watch_levels,],
        dist="exact")
    data.frame(
        comparison = paste(watch_levels, collapse=" - "),
        z = statistic(wt),
        pvalue = pvalue(wt) 
    )
})
# derive adjusted p values using Holm-Bonferroni method
post_hoc_tests$adjusted_pvalue = p.adjust(post_hoc_tests$pvalue) 
post_hoc_tests

ddply(salesYY2, ~ watch, function(data) summary(data$sales))




########### multifactor tests

#### mixed anova
salesYY2city = read.csv("chapter5/salesYY2city.csv")

m = aov(sales ~ watch * city + Error(subject/watch), data=salesYY2city)
summary(m)

m = ezANOVA(dv=sales, between=city, within=watch, wid=subject, data=salesYY2city)
m$Mauchly
m$ANOVA

# interaction plot
with(salesYY2city, interaction.plot(watch, city, sales))


#art version
library(ARTool)
library(lsmeans)
m = art(sales ~ watch * city + (1|subject), data=salesYY2city)
anova(m)
#pairwise post hoc tests
lsmeans(artlm(m, "watch"), pairwise ~ watch)


############ multinomial regression #1
prefs2ABC = read.csv("chapter5/prefs2ABC.csv")
library(nnet)
library(car)
m = multinom(email_preference ~ team, data=prefs2ABC)
Anova(m)


############ multinomial regression #2
prefs2ABCsex = read.csv("chapter5/prefs2ABCsex.csv")
library(nnet)
library(car)
contrasts(prefs2ABCsex$team) = "contr.sum"
contrasts(prefs2ABCsex$sex) = "contr.sum"
m = multinom(email_preference ~ team * sex, data=prefs2ABCsex)
Anova(m, type=3)




############# ordinal regression

prefs2ABClove = read.csv("chapter5/prefs2ABClove.csv")
library(MASS)
library(car)
contrasts(prefs2ABClove$team) = "contr.sum"
contrasts(prefs2ABClove$sex) = "contr.sum"
# transform numeric variable into an ordinal variable
prefs2ABClove$love = ordered(prefs2ABClove$love)
# run regression and analysis of variance
m = polr(love ~ team * sex, data=prefs2ABClove)
Anova(m, type=3)


########### Poisson regression

prefs2ABClate = read.csv("chapter5/prefs2ABClate.csv")
contrasts(prefs2ABClate$team) = "contr.sum"
contrasts(prefs2ABClate$sex) = "contr.sum"
contrasts(prefs2ABClate$email_preference) = "contr.sum"
m = glm(late_responses ~ team * sex * email_preference, data=prefs2ABClate, family=quasipoisson)
Anova(m, type=3)

#summary stats
ddply(prefs2ABClate, ~ team, function(data) summary(data$late_responses))

#differences estimated by the model
library(multcomp)
library(lsmeans)
team_effect = confint(glht(m, lsm(pairwise ~ team)))
team_effect
exp(team_effect$confint)


#differences estimated by the model, take 2
library(lsmeans)
lsmeans(m, pairwise ~ team, type="response")


################ Gamma regression

salesXYsex = read.csv("chapter5/salesXYsex.csv")
contrasts(salesXYsex$team) = "contr.sum"
contrasts(salesXYsex$sex) = "contr.sum"
m = glm(sales ~ team * sex, data=salesXYsex, family=Gamma(link=log))
Anova(m, type=3)




################ GLMM Gamma

salesYY2city = read.csv("chapter5/salesYY2city.csv")
library(lme4)
library(lmerTest)
library(car)
contrasts(salesYY2city$city) = "contr.sum"
contrasts(salesYY2city$watch) = "contr.sum"
m = glmer(sales ~ city * watch + (1|subject), data=salesYY2city, family=Gamma(link=log))
Anova(m, type=3)


################# GEEE Gamma
salesYY2city = read.csv("chapter5/salesYY2city.csv")
library(geepack)
# geeglm requires data sorted by grouping variable, so we sort
# by subject (so that all rows for a given subject are
# contiguous)
salesYY2city = salesYY2city[order(salesYY2city$subject),] 
m = geeglm(sales ~ city * watch, id=subject, data=salesYY2city, family=Gamma(link=log))
anova(m)
